{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "3aef0848d3fb4021a56543d6dc02ff1f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_76d1784ba96349639024bd220b80c400",
              "IPY_MODEL_dbfbbca09aa443ba9944d8eba9d49ab3",
              "IPY_MODEL_ee4cca0e6c204136984a5f9b258b122b"
            ],
            "layout": "IPY_MODEL_4094dbe404e14273a5d83ed2b54ab8b7"
          }
        },
        "76d1784ba96349639024bd220b80c400": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a0788d9d784d46d1b36f58aa112f1550",
            "placeholder": "​",
            "style": "IPY_MODEL_2360c6610f7d4783ba217b79cbc8313e",
            "value": "100%"
          }
        },
        "dbfbbca09aa443ba9944d8eba9d49ab3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8c11e99d43274804b4f755906d2d4375",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_73d0b51ab41c43c6a4266db5e3a533e0",
            "value": 1
          }
        },
        "ee4cca0e6c204136984a5f9b258b122b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_dd96dff0c6384f9486457707c44af57c",
            "placeholder": "​",
            "style": "IPY_MODEL_3dce3834b0d74f73aeef72f35fcf0b8a",
            "value": " 1/1 [00:03&lt;00:00,  3.49s/it]"
          }
        },
        "4094dbe404e14273a5d83ed2b54ab8b7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a0788d9d784d46d1b36f58aa112f1550": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2360c6610f7d4783ba217b79cbc8313e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "8c11e99d43274804b4f755906d2d4375": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "73d0b51ab41c43c6a4266db5e3a533e0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "dd96dff0c6384f9486457707c44af57c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3dce3834b0d74f73aeef72f35fcf0b8a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "a071ae5df1c1410bb592d7f13ab45690": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_feb7bc471e9c4e43aca87a643cf8448a",
              "IPY_MODEL_193ec0cf8e3e40bab991e92d71e5ec49",
              "IPY_MODEL_662244593b244a4ebb1be545822d45cd"
            ],
            "layout": "IPY_MODEL_fcec271f0cac4fdeb29a272d5050bb44"
          }
        },
        "feb7bc471e9c4e43aca87a643cf8448a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_91843accc6124b329f403cee528f2dda",
            "placeholder": "​",
            "style": "IPY_MODEL_5a653d76b53a487086b8b2676325adcc",
            "value": "100%"
          }
        },
        "193ec0cf8e3e40bab991e92d71e5ec49": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_373cb4baf44d452a81aaaa6b8ad2ceab",
            "max": 6,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_b2b7b9601bd34a6b9ec28eee4693b099",
            "value": 6
          }
        },
        "662244593b244a4ebb1be545822d45cd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d8408bdcb8b14e97a4119c519a4ebe1c",
            "placeholder": "​",
            "style": "IPY_MODEL_3d2b71d9e8294d2ab36c3dc4f155877f",
            "value": " 6/6 [00:04&lt;00:00,  1.90it/s]"
          }
        },
        "fcec271f0cac4fdeb29a272d5050bb44": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "91843accc6124b329f403cee528f2dda": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5a653d76b53a487086b8b2676325adcc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "373cb4baf44d452a81aaaa6b8ad2ceab": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b2b7b9601bd34a6b9ec28eee4693b099": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "d8408bdcb8b14e97a4119c519a4ebe1c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3d2b71d9e8294d2ab36c3dc4f155877f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "841c3c9f707041de8e77f6c198acf45c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_7648691a402e4e719e6d1b149046f1be",
              "IPY_MODEL_f0fa4263243a4134afbbd241d357d873",
              "IPY_MODEL_dc8b702d1c774c9faf05e65c1ec519e0"
            ],
            "layout": "IPY_MODEL_870f9567682c43f8b43311a4d4e3a41d"
          }
        },
        "7648691a402e4e719e6d1b149046f1be": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c68ee81ae4f048a385960db78b3b18c2",
            "placeholder": "​",
            "style": "IPY_MODEL_de274a0c13c847e2a3146da5a5e30c2b",
            "value": "Downloading artifacts: 100%"
          }
        },
        "f0fa4263243a4134afbbd241d357d873": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_51ac93a4cd284d61b5463647e09fd152",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_78de80cdcee14f61ad27cc4b31ad1384",
            "value": 1
          }
        },
        "dc8b702d1c774c9faf05e65c1ec519e0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ced1d46665f84a72be4f0209061de614",
            "placeholder": "​",
            "style": "IPY_MODEL_546c75b9414f47a38b8855acc1db7724",
            "value": " 1/1 [00:00&lt;00:00, 45.86it/s]"
          }
        },
        "870f9567682c43f8b43311a4d4e3a41d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c68ee81ae4f048a385960db78b3b18c2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "de274a0c13c847e2a3146da5a5e30c2b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "51ac93a4cd284d61b5463647e09fd152": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "78de80cdcee14f61ad27cc4b31ad1384": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "ced1d46665f84a72be4f0209061de614": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "546c75b9414f47a38b8855acc1db7724": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "49b6c3af203648b1a8cfcc1a21086b97": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_df7aeb3b7fe54fd2859eafe39bda3ad2",
              "IPY_MODEL_fd7ace7567d04cd1ac517f39e2adfdd8",
              "IPY_MODEL_8a433652c0e641d4b932f6f325acba3c"
            ],
            "layout": "IPY_MODEL_fc4eae37f90c480b8e5fe936ec54bf2b"
          }
        },
        "df7aeb3b7fe54fd2859eafe39bda3ad2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e7a7a18ebf7247b689f372fff06a68e8",
            "placeholder": "​",
            "style": "IPY_MODEL_b5ef3be95a204763b3001dc540d9ca26",
            "value": "Downloading artifacts: 100%"
          }
        },
        "fd7ace7567d04cd1ac517f39e2adfdd8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e1d64167d14d41a989df6b8bf4907462",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_5732cb24798c487c8b1724c14fb05343",
            "value": 1
          }
        },
        "8a433652c0e641d4b932f6f325acba3c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_42fcf96e77d34799a874c924b5a6f46c",
            "placeholder": "​",
            "style": "IPY_MODEL_ac1f0ab7f4fe4d3ea178b3efdb3a86b6",
            "value": " 1/1 [00:00&lt;00:00, 69.87it/s]"
          }
        },
        "fc4eae37f90c480b8e5fe936ec54bf2b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e7a7a18ebf7247b689f372fff06a68e8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b5ef3be95a204763b3001dc540d9ca26": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e1d64167d14d41a989df6b8bf4907462": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5732cb24798c487c8b1724c14fb05343": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "42fcf96e77d34799a874c924b5a6f46c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ac1f0ab7f4fe4d3ea178b3efdb3a86b6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Getting Started with MLFlow for LLM Evaluation\n",
        "MLflow is a powerful open-source platform for managing the machine learning lifecycle. While it's traditionally used for tracking model experiments, logging parameters, and managing deployments, MLflow has recently introduced support for evaluating Large Language Models (LLMs).\n",
        "\n",
        "In this tutorial, we explore how to use MLflow to evaluate the performance of an LLM—in our case, Google's Gemini model—on a set of fact-based prompts. We'll generate responses to fact-based prompts using Gemini and assess their quality using a variety of metrics supported directly by MLflow."
      ],
      "metadata": {
        "id": "4yFa5qLUeshB"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 1. Setting up the dependencies\n",
        "For this tutorial, we’ll be using both the OpenAI and Gemini APIs. MLflow’s built-in generative AI evaluation metrics currently rely on OpenAI models (e.g., GPT-4) to act as judges for metrics like answer similarity or faithfulness, so an OpenAI API key is required. You can obtain:\n",
        "\n",
        "* Your OpenAI API key from https://platform.openai.com/settings/organization/api-keys\n",
        "\n",
        "* Your Google Gemini API key from https://ai.google.dev/gemini-api/docs"
      ],
      "metadata": {
        "id": "kEsLmlqRBsDu"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "### 1.1 Installing the libraries"
      ],
      "metadata": {
        "id": "Hw4bKpd9CWBr"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "collapsed": true,
        "id": "DwYdC3JeLYeC",
        "outputId": "eb77d189-56c0-4a9e-f8b0-efea36b8d7b6"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: mlflow in /usr/local/lib/python3.11/dist-packages (3.1.1)\n",
            "Requirement already satisfied: openai in /usr/local/lib/python3.11/dist-packages (1.91.0)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n",
            "Requirement already satisfied: google-genai in /usr/local/lib/python3.11/dist-packages (1.21.1)\n",
            "Requirement already satisfied: mlflow-skinny==3.1.1 in /usr/local/lib/python3.11/dist-packages (from mlflow) (3.1.1)\n",
            "Requirement already satisfied: Flask<4 in /usr/local/lib/python3.11/dist-packages (from mlflow) (3.1.1)\n",
            "Requirement already satisfied: alembic!=1.10.0,<2 in /usr/local/lib/python3.11/dist-packages (from mlflow) (1.16.2)\n",
            "Requirement already satisfied: docker<8,>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from mlflow) (7.1.0)\n",
            "Requirement already satisfied: graphene<4 in /usr/local/lib/python3.11/dist-packages (from mlflow) (3.4.3)\n",
            "Requirement already satisfied: gunicorn<24 in /usr/local/lib/python3.11/dist-packages (from mlflow) (23.0.0)\n",
            "Requirement already satisfied: matplotlib<4 in /usr/local/lib/python3.11/dist-packages (from mlflow) (3.10.0)\n",
            "Requirement already satisfied: numpy<3 in /usr/local/lib/python3.11/dist-packages (from mlflow) (2.0.2)\n",
            "Requirement already satisfied: pyarrow<21,>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from mlflow) (18.1.0)\n",
            "Requirement already satisfied: scikit-learn<2 in /usr/local/lib/python3.11/dist-packages (from mlflow) (1.6.1)\n",
            "Requirement already satisfied: scipy<2 in /usr/local/lib/python3.11/dist-packages (from mlflow) (1.15.3)\n",
            "Requirement already satisfied: sqlalchemy<3,>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from mlflow) (2.0.41)\n",
            "Requirement already satisfied: cachetools<7,>=5.0.0 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (5.5.2)\n",
            "Requirement already satisfied: click<9,>=7.0 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (8.2.1)\n",
            "Requirement already satisfied: cloudpickle<4 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (3.1.1)\n",
            "Requirement already satisfied: databricks-sdk<1,>=0.20.0 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (0.57.0)\n",
            "Requirement already satisfied: fastapi<1 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (0.115.13)\n",
            "Requirement already satisfied: gitpython<4,>=3.1.9 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (3.1.44)\n",
            "Requirement already satisfied: importlib_metadata!=4.7.0,<9,>=3.7.0 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (8.7.0)\n",
            "Requirement already satisfied: opentelemetry-api<3,>=1.9.0 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (1.34.1)\n",
            "Requirement already satisfied: opentelemetry-sdk<3,>=1.9.0 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (1.34.1)\n",
            "Requirement already satisfied: packaging<26 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (24.2)\n",
            "Requirement already satisfied: protobuf<7,>=3.12.0 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (5.29.5)\n",
            "Requirement already satisfied: pydantic<3,>=1.10.8 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (2.11.7)\n",
            "Requirement already satisfied: pyyaml<7,>=5.1 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (6.0.2)\n",
            "Requirement already satisfied: requests<3,>=2.17.3 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (2.32.3)\n",
            "Requirement already satisfied: sqlparse<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (0.5.3)\n",
            "Requirement already satisfied: typing-extensions<5,>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (4.14.0)\n",
            "Requirement already satisfied: uvicorn<1 in /usr/local/lib/python3.11/dist-packages (from mlflow-skinny==3.1.1->mlflow) (0.34.3)\n",
            "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.11/dist-packages (from openai) (4.9.0)\n",
            "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai) (1.9.0)\n",
            "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from openai) (0.28.1)\n",
            "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai) (0.10.0)\n",
            "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai) (1.3.1)\n",
            "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.11/dist-packages (from openai) (4.67.1)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.9.0.post0)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n",
            "Requirement already satisfied: google-auth<3.0.0,>=2.14.1 in /usr/local/lib/python3.11/dist-packages (from google-genai) (2.38.0)\n",
            "Requirement already satisfied: tenacity<9.0.0,>=8.2.3 in /usr/local/lib/python3.11/dist-packages (from google-genai) (8.5.0)\n",
            "Requirement already satisfied: websockets<15.1.0,>=13.0.0 in /usr/local/lib/python3.11/dist-packages (from google-genai) (15.0.1)\n",
            "Requirement already satisfied: Mako in /usr/lib/python3/dist-packages (from alembic!=1.10.0,<2->mlflow) (1.1.3)\n",
            "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5,>=3.5.0->openai) (3.10)\n",
            "Requirement already satisfied: urllib3>=1.26.0 in /usr/local/lib/python3.11/dist-packages (from docker<8,>=4.0.0->mlflow) (2.4.0)\n",
            "Requirement already satisfied: blinker>=1.9.0 in /usr/local/lib/python3.11/dist-packages (from Flask<4->mlflow) (1.9.0)\n",
            "Requirement already satisfied: itsdangerous>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from Flask<4->mlflow) (2.2.0)\n",
            "Requirement already satisfied: jinja2>=3.1.2 in /usr/local/lib/python3.11/dist-packages (from Flask<4->mlflow) (3.1.6)\n",
            "Requirement already satisfied: markupsafe>=2.1.1 in /usr/local/lib/python3.11/dist-packages (from Flask<4->mlflow) (3.0.2)\n",
            "Requirement already satisfied: werkzeug>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from Flask<4->mlflow) (3.1.3)\n",
            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.11/dist-packages (from google-auth<3.0.0,>=2.14.1->google-genai) (0.4.2)\n",
            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.11/dist-packages (from google-auth<3.0.0,>=2.14.1->google-genai) (4.9.1)\n",
            "Requirement already satisfied: graphql-core<3.3,>=3.1 in /usr/local/lib/python3.11/dist-packages (from graphene<4->mlflow) (3.2.6)\n",
            "Requirement already satisfied: graphql-relay<3.3,>=3.1 in /usr/local/lib/python3.11/dist-packages (from graphene<4->mlflow) (3.2.0)\n",
            "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->openai) (2025.6.15)\n",
            "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->openai) (1.0.9)\n",
            "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.16.0)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib<4->mlflow) (1.3.2)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib<4->mlflow) (0.12.1)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib<4->mlflow) (4.58.4)\n",
            "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib<4->mlflow) (1.4.8)\n",
            "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib<4->mlflow) (11.2.1)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib<4->mlflow) (3.2.3)\n",
            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.1.1->mlflow) (0.7.0)\n",
            "Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.1.1->mlflow) (2.33.2)\n",
            "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.1.1->mlflow) (0.4.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.17.3->mlflow-skinny==3.1.1->mlflow) (3.4.2)\n",
            "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn<2->mlflow) (1.5.1)\n",
            "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn<2->mlflow) (3.6.0)\n",
            "Requirement already satisfied: greenlet>=1 in /usr/local/lib/python3.11/dist-packages (from sqlalchemy<3,>=1.4.0->mlflow) (3.2.3)\n",
            "Requirement already satisfied: starlette<0.47.0,>=0.40.0 in /usr/local/lib/python3.11/dist-packages (from fastapi<1->mlflow-skinny==3.1.1->mlflow) (0.46.2)\n",
            "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython<4,>=3.1.9->mlflow-skinny==3.1.1->mlflow) (4.0.12)\n",
            "Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.11/dist-packages (from importlib_metadata!=4.7.0,<9,>=3.7.0->mlflow-skinny==3.1.1->mlflow) (3.23.0)\n",
            "Requirement already satisfied: opentelemetry-semantic-conventions==0.55b1 in /usr/local/lib/python3.11/dist-packages (from opentelemetry-sdk<3,>=1.9.0->mlflow-skinny==3.1.1->mlflow) (0.55b1)\n",
            "Requirement already satisfied: pyasn1<0.7.0,>=0.6.1 in /usr/local/lib/python3.11/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.0,>=2.14.1->google-genai) (0.6.1)\n",
            "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython<4,>=3.1.9->mlflow-skinny==3.1.1->mlflow) (5.0.2)\n"
          ]
        }
      ],
      "source": [
        "!pip install mlflow openai pandas google-genai"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### 1.2 Setting the OpenAI and Google API Keys as environment variable"
      ],
      "metadata": {
        "id": "wjy_uYgNCZDw"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "from getpass import getpass\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = getpass('Enter OpenAI API Key:')\n",
        "os.environ[\"GOOGLE_API_KEY\"] = getpass('Enter Google API Key:')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "BDRWJRF5LkHE",
        "outputId": "8307f8d4-5a60-44bb-c815-5a031b794707"
      },
      "execution_count": 2,
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Enter OpenAI API Key:··········\n",
            "Enter Google API Key:··········\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 2. Preparing Evaluation Data and Fetching Outputs from Gemini"
      ],
      "metadata": {
        "id": "rPGraTQNCg33"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "### 2.1 Importing the libraries"
      ],
      "metadata": {
        "id": "6IhxurXtCtFS"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import mlflow\n",
        "import openai\n",
        "import os\n",
        "import pandas as pd\n",
        "from google import genai"
      ],
      "metadata": {
        "id": "hubxBTYvLv-i"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### 2.2 Creating the evaluation data\n",
        "In this step, we define a small evaluation dataset containing factual prompts along with their correct ground truth answers. These prompts span topics such as science, health, web development, and programming. This structured format allows us to objectively compare the Gemini-generated responses against known correct answers using various evaluation metrics in MLflow."
      ],
      "metadata": {
        "id": "5dbr-HKGCvwy"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "eval_data = pd.DataFrame(\n",
        "    {\n",
        "        \"inputs\": [\n",
        "            \"Who developed the theory of general relativity?\",\n",
        "            \"What are the primary functions of the liver in the human body?\",\n",
        "            \"Explain what HTTP status code 404 means.\",\n",
        "            \"What is the boiling point of water at sea level in Celsius?\",\n",
        "            \"Name the largest planet in our solar system.\",\n",
        "            \"What programming language is primarily used for developing iOS apps?\",\n",
        "        ],\n",
        "        \"ground_truth\": [\n",
        "            \"Albert Einstein developed the theory of general relativity.\",\n",
        "            \"The liver helps in detoxification, protein synthesis, and production of biochemicals necessary for digestion.\",\n",
        "            \"HTTP 404 means 'Not Found' — the server can't find the requested resource.\",\n",
        "            \"The boiling point of water at sea level is 100 degrees Celsius.\",\n",
        "            \"Jupiter is the largest planet in our solar system.\",\n",
        "            \"Swift is the primary programming language used for iOS app development.\"\n",
        "        ]\n",
        "    }\n",
        ")\n",
        "\n",
        "eval_data"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 238
        },
        "id": "iFb18mr0LyCi",
        "outputId": "c5180208-1d4f-4d02-c866-75d35f96063d"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                                              inputs  \\\n",
              "0    Who developed the theory of general relativity?   \n",
              "1  What are the primary functions of the liver in...   \n",
              "2           Explain what HTTP status code 404 means.   \n",
              "3  What is the boiling point of water at sea leve...   \n",
              "4       Name the largest planet in our solar system.   \n",
              "5  What programming language is primarily used fo...   \n",
              "\n",
              "                                        ground_truth  \n",
              "0  Albert Einstein developed the theory of genera...  \n",
              "1  The liver helps in detoxification, protein syn...  \n",
              "2  HTTP 404 means 'Not Found' — the server can't ...  \n",
              "3  The boiling point of water at sea level is 100...  \n",
              "4  Jupiter is the largest planet in our solar sys...  \n",
              "5  Swift is the primary programming language used...  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-fc25da67-4b36-4bfa-8f2b-2bf9a7eb426e\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>inputs</th>\n",
              "      <th>ground_truth</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Who developed the theory of general relativity?</td>\n",
              "      <td>Albert Einstein developed the theory of genera...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>What are the primary functions of the liver in...</td>\n",
              "      <td>The liver helps in detoxification, protein syn...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Explain what HTTP status code 404 means.</td>\n",
              "      <td>HTTP 404 means 'Not Found' — the server can't ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>What is the boiling point of water at sea leve...</td>\n",
              "      <td>The boiling point of water at sea level is 100...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Name the largest planet in our solar system.</td>\n",
              "      <td>Jupiter is the largest planet in our solar sys...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>What programming language is primarily used fo...</td>\n",
              "      <td>Swift is the primary programming language used...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-fc25da67-4b36-4bfa-8f2b-2bf9a7eb426e')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-fc25da67-4b36-4bfa-8f2b-2bf9a7eb426e button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-fc25da67-4b36-4bfa-8f2b-2bf9a7eb426e');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "    <div id=\"df-39207e83-9591-4565-9598-205d2a1f1c70\">\n",
              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-39207e83-9591-4565-9598-205d2a1f1c70')\"\n",
              "                title=\"Suggest charts\"\n",
              "                style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "      </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "      <script>\n",
              "        async function quickchart(key) {\n",
              "          const quickchartButtonEl =\n",
              "            document.querySelector('#' + key + ' button');\n",
              "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "          try {\n",
              "            const charts = await google.colab.kernel.invokeFunction(\n",
              "                'suggestCharts', [key], {});\n",
              "          } catch (error) {\n",
              "            console.error('Error during call to suggestCharts:', error);\n",
              "          }\n",
              "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "        }\n",
              "        (() => {\n",
              "          let quickchartButtonEl =\n",
              "            document.querySelector('#df-39207e83-9591-4565-9598-205d2a1f1c70 button');\n",
              "          quickchartButtonEl.style.display =\n",
              "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "        })();\n",
              "      </script>\n",
              "    </div>\n",
              "\n",
              "  <div id=\"id_38e035d8-e22d-43ac-87e5-4684a7099ac3\">\n",
              "    <style>\n",
              "      .colab-df-generate {\n",
              "        background-color: #E8F0FE;\n",
              "        border: none;\n",
              "        border-radius: 50%;\n",
              "        cursor: pointer;\n",
              "        display: none;\n",
              "        fill: #1967D2;\n",
              "        height: 32px;\n",
              "        padding: 0 0 0 0;\n",
              "        width: 32px;\n",
              "      }\n",
              "\n",
              "      .colab-df-generate:hover {\n",
              "        background-color: #E2EBFA;\n",
              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "        fill: #174EA6;\n",
              "      }\n",
              "\n",
              "      [theme=dark] .colab-df-generate {\n",
              "        background-color: #3B4455;\n",
              "        fill: #D2E3FC;\n",
              "      }\n",
              "\n",
              "      [theme=dark] .colab-df-generate:hover {\n",
              "        background-color: #434B5C;\n",
              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "        fill: #FFFFFF;\n",
              "      }\n",
              "    </style>\n",
              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('eval_data')\"\n",
              "            title=\"Generate code using this dataframe.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "    <script>\n",
              "      (() => {\n",
              "      const buttonEl =\n",
              "        document.querySelector('#id_38e035d8-e22d-43ac-87e5-4684a7099ac3 button.colab-df-generate');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      buttonEl.onclick = () => {\n",
              "        google.colab.notebook.generateWithVariable('eval_data');\n",
              "      }\n",
              "      })();\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "eval_data",
              "summary": "{\n  \"name\": \"eval_data\",\n  \"rows\": 6,\n  \"fields\": [\n    {\n      \"column\": \"inputs\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Who developed the theory of general relativity?\",\n          \"What are the primary functions of the liver in the human body?\",\n          \"What programming language is primarily used for developing iOS apps?\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"ground_truth\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Albert Einstein developed the theory of general relativity.\",\n          \"The liver helps in detoxification, protein synthesis, and production of biochemicals necessary for digestion.\",\n          \"Swift is the primary programming language used for iOS app development.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 4
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### 2.3 Getting Gemini Responses\n",
        "This code block defines a helper function gemini_completion() that sends a prompt to the Gemini 1.5 Flash model using the Google Generative AI SDK and returns the generated response as plain text. We then apply this function to each prompt in our evaluation dataset to generate the model's predictions, storing them in a new \"predictions\" column. These predictions will later be evaluated against the ground truth answers"
      ],
      "metadata": {
        "id": "Q1wURsFkC6ZI"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "client = genai.Client()\n",
        "def gemini_completion(prompt: str) -> str:\n",
        "    response = client.models.generate_content(\n",
        "        model=\"gemini-1.5-flash\",\n",
        "        contents=prompt\n",
        "    )\n",
        "    return response.text.strip()\n",
        "\n",
        "eval_data[\"predictions\"] = eval_data[\"inputs\"].apply(gemini_completion)\n",
        "eval_data"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 238
        },
        "id": "sbHyfta1Mm_3",
        "outputId": "22d14167-1382-4e08-fa90-aa4cef6cb8d3"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                                              inputs  \\\n",
              "0    Who developed the theory of general relativity?   \n",
              "1  What are the primary functions of the liver in...   \n",
              "2           Explain what HTTP status code 404 means.   \n",
              "3  What is the boiling point of water at sea leve...   \n",
              "4       Name the largest planet in our solar system.   \n",
              "5  What programming language is primarily used fo...   \n",
              "\n",
              "                                        ground_truth  \\\n",
              "0  Albert Einstein developed the theory of genera...   \n",
              "1  The liver helps in detoxification, protein syn...   \n",
              "2  HTTP 404 means 'Not Found' — the server can't ...   \n",
              "3  The boiling point of water at sea level is 100...   \n",
              "4  Jupiter is the largest planet in our solar sys...   \n",
              "5  Swift is the primary programming language used...   \n",
              "\n",
              "                                         predictions  \n",
              "0  Albert Einstein developed the theory of genera...  \n",
              "1  The liver performs a vast array of crucial fun...  \n",
              "2  HTTP status code 404 means that the server cou...  \n",
              "3                                100 degrees Celsius  \n",
              "4                                            Jupiter  \n",
              "5  Swift is the primary programming language used...  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-10d02038-6120-4e43-be2a-2c32378c5208\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>inputs</th>\n",
              "      <th>ground_truth</th>\n",
              "      <th>predictions</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Who developed the theory of general relativity?</td>\n",
              "      <td>Albert Einstein developed the theory of genera...</td>\n",
              "      <td>Albert Einstein developed the theory of genera...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>What are the primary functions of the liver in...</td>\n",
              "      <td>The liver helps in detoxification, protein syn...</td>\n",
              "      <td>The liver performs a vast array of crucial fun...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Explain what HTTP status code 404 means.</td>\n",
              "      <td>HTTP 404 means 'Not Found' — the server can't ...</td>\n",
              "      <td>HTTP status code 404 means that the server cou...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>What is the boiling point of water at sea leve...</td>\n",
              "      <td>The boiling point of water at sea level is 100...</td>\n",
              "      <td>100 degrees Celsius</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Name the largest planet in our solar system.</td>\n",
              "      <td>Jupiter is the largest planet in our solar sys...</td>\n",
              "      <td>Jupiter</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>What programming language is primarily used fo...</td>\n",
              "      <td>Swift is the primary programming language used...</td>\n",
              "      <td>Swift is the primary programming language used...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-10d02038-6120-4e43-be2a-2c32378c5208')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-10d02038-6120-4e43-be2a-2c32378c5208 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-10d02038-6120-4e43-be2a-2c32378c5208');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "    <div id=\"df-3f53da9a-73de-4c70-a69f-742ebee4caea\">\n",
              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-3f53da9a-73de-4c70-a69f-742ebee4caea')\"\n",
              "                title=\"Suggest charts\"\n",
              "                style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "      </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "      <script>\n",
              "        async function quickchart(key) {\n",
              "          const quickchartButtonEl =\n",
              "            document.querySelector('#' + key + ' button');\n",
              "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "          try {\n",
              "            const charts = await google.colab.kernel.invokeFunction(\n",
              "                'suggestCharts', [key], {});\n",
              "          } catch (error) {\n",
              "            console.error('Error during call to suggestCharts:', error);\n",
              "          }\n",
              "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "        }\n",
              "        (() => {\n",
              "          let quickchartButtonEl =\n",
              "            document.querySelector('#df-3f53da9a-73de-4c70-a69f-742ebee4caea button');\n",
              "          quickchartButtonEl.style.display =\n",
              "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "        })();\n",
              "      </script>\n",
              "    </div>\n",
              "\n",
              "  <div id=\"id_b222e052-0d8d-4dd8-8a07-f630e2483813\">\n",
              "    <style>\n",
              "      .colab-df-generate {\n",
              "        background-color: #E8F0FE;\n",
              "        border: none;\n",
              "        border-radius: 50%;\n",
              "        cursor: pointer;\n",
              "        display: none;\n",
              "        fill: #1967D2;\n",
              "        height: 32px;\n",
              "        padding: 0 0 0 0;\n",
              "        width: 32px;\n",
              "      }\n",
              "\n",
              "      .colab-df-generate:hover {\n",
              "        background-color: #E2EBFA;\n",
              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "        fill: #174EA6;\n",
              "      }\n",
              "\n",
              "      [theme=dark] .colab-df-generate {\n",
              "        background-color: #3B4455;\n",
              "        fill: #D2E3FC;\n",
              "      }\n",
              "\n",
              "      [theme=dark] .colab-df-generate:hover {\n",
              "        background-color: #434B5C;\n",
              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "        fill: #FFFFFF;\n",
              "      }\n",
              "    </style>\n",
              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('eval_data')\"\n",
              "            title=\"Generate code using this dataframe.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "    <script>\n",
              "      (() => {\n",
              "      const buttonEl =\n",
              "        document.querySelector('#id_b222e052-0d8d-4dd8-8a07-f630e2483813 button.colab-df-generate');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      buttonEl.onclick = () => {\n",
              "        google.colab.notebook.generateWithVariable('eval_data');\n",
              "      }\n",
              "      })();\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "eval_data",
              "summary": "{\n  \"name\": \"eval_data\",\n  \"rows\": 6,\n  \"fields\": [\n    {\n      \"column\": \"inputs\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Who developed the theory of general relativity?\",\n          \"What are the primary functions of the liver in the human body?\",\n          \"What programming language is primarily used for developing iOS apps?\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"ground_truth\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Albert Einstein developed the theory of general relativity.\",\n          \"The liver helps in detoxification, protein synthesis, and production of biochemicals necessary for digestion.\",\n          \"Swift is the primary programming language used for iOS app development.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"predictions\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Albert Einstein developed the theory of general relativity.\",\n          \"The liver performs a vast array of crucial functions in the human body.  These can be broadly categorized as follows:\\n\\n**1. Metabolism:**\\n\\n* **Carbohydrate metabolism:**  The liver regulates blood glucose levels by storing glucose as glycogen (glycogenesis), breaking down glycogen into glucose (glycogenolysis), and producing glucose from non-carbohydrate sources (gluconeogenesis).\\n* **Lipid metabolism:** The liver synthesizes cholesterol and lipoproteins, processes fatty acids, and helps in the breakdown of fats.\\n* **Protein metabolism:** The liver synthesizes many essential proteins, including albumin (important for blood osmotic pressure), clotting factors, and various enzymes. It also deaminates amino acids (removes nitrogen) and converts ammonia (toxic) to urea (less toxic for excretion by the kidneys).\\n\\n**2. Detoxification:**\\n\\n* **Drug metabolism:** The liver breaks down and metabolizes many medications and other substances, making them less harmful or easier to excrete.\\n* **Waste product removal:** The liver processes and eliminates various metabolic waste products, toxins, and drugs from the bloodstream.\\n* **Hormone metabolism:** The liver metabolizes and inactivates many hormones.\\n\\n**3. Excretion:**\\n\\n* **Bile production:** The liver produces bile, a fluid crucial for fat digestion and absorption in the intestines. Bile contains bile salts, bilirubin (a byproduct of hemoglobin breakdown), and cholesterol.  Bilirubin is then excreted in the feces.\\n\\n**4. Storage:**\\n\\n* **Glycogen storage:** As mentioned above, the liver stores glucose as glycogen.\\n* **Vitamin and mineral storage:**  The liver stores vitamins A, D, E, K, and B12, as well as iron.\\n\\n**5. Synthesis:**\\n\\n* **Protein synthesis:** The liver synthesizes many essential proteins vital for various bodily functions.\\n* **Bile synthesis:**  As mentioned above.\\n* **Plasma protein synthesis:**  Including albumin and clotting factors.\\n\\n\\nThese are the primary functions; the liver's roles are interconnected and essential for maintaining overall health and homeostasis.  Damage to the liver can have severe and wide-ranging consequences throughout the body.\",\n          \"Swift is the primary programming language used for developing iOS apps.  While Objective-C was used extensively in the past, Swift is now Apple's preferred language for iOS, macOS, watchOS, and tvOS development.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 5
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 3. Evaluating Gemini Outputs with MLflow\n",
        "In this step, we initiate an MLflow run to evaluate the responses generated by the Gemini model against a set of factual ground-truth answers. We use the mlflow.evaluate() method with four lightweight metrics: **answer_similarity** (measuring semantic similarity between the model’s output and the ground truth), **exact_match** (checking for word-for-word matches), **latency** (tracking response generation time), and **token_count** (logging the number of output tokens).\n",
        "\n",
        "It's important to note that the **answer_similarity** metric internally uses OpenAI’s GPT model to judge the semantic closeness between answers, which is why access to the OpenAI API is required. This setup provides an efficient way to assess LLM outputs without relying on custom evaluation logic. The final evaluation results are printed and also saved to a CSV file for later inspection or visualization."
      ],
      "metadata": {
        "id": "qPc8_H44DNJ3"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "mlflow.set_tracking_uri(\"mlruns\")\n",
        "mlflow.set_experiment(\"Gemini Simple Metrics Eval\")\n",
        "\n",
        "with mlflow.start_run():\n",
        "    results = mlflow.evaluate(\n",
        "        model_type=\"question-answering\",\n",
        "        data=eval_data,\n",
        "        predictions=\"predictions\",\n",
        "        targets=\"ground_truth\",\n",
        "        extra_metrics=[\n",
        "          mlflow.metrics.genai.answer_similarity(),\n",
        "          mlflow.metrics.exact_match(),\n",
        "          mlflow.metrics.latency(),\n",
        "          mlflow.metrics.token_count()\n",
        "      ]\n",
        "    )\n",
        "    print(\"Aggregated Metrics:\")\n",
        "    print(results.metrics)\n",
        "\n",
        "    # Save detailed table\n",
        "    results.tables[\"eval_results_table\"].to_csv(\"gemini_eval_results.csv\", index=False)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 443,
          "referenced_widgets": [
            "3aef0848d3fb4021a56543d6dc02ff1f",
            "76d1784ba96349639024bd220b80c400",
            "dbfbbca09aa443ba9944d8eba9d49ab3",
            "ee4cca0e6c204136984a5f9b258b122b",
            "4094dbe404e14273a5d83ed2b54ab8b7",
            "a0788d9d784d46d1b36f58aa112f1550",
            "2360c6610f7d4783ba217b79cbc8313e",
            "8c11e99d43274804b4f755906d2d4375",
            "73d0b51ab41c43c6a4266db5e3a533e0",
            "dd96dff0c6384f9486457707c44af57c",
            "3dce3834b0d74f73aeef72f35fcf0b8a",
            "a071ae5df1c1410bb592d7f13ab45690",
            "feb7bc471e9c4e43aca87a643cf8448a",
            "193ec0cf8e3e40bab991e92d71e5ec49",
            "662244593b244a4ebb1be545822d45cd",
            "fcec271f0cac4fdeb29a272d5050bb44",
            "91843accc6124b329f403cee528f2dda",
            "5a653d76b53a487086b8b2676325adcc",
            "373cb4baf44d452a81aaaa6b8ad2ceab",
            "b2b7b9601bd34a6b9ec28eee4693b099",
            "d8408bdcb8b14e97a4119c519a4ebe1c",
            "3d2b71d9e8294d2ab36c3dc4f155877f",
            "841c3c9f707041de8e77f6c198acf45c",
            "7648691a402e4e719e6d1b149046f1be",
            "f0fa4263243a4134afbbd241d357d873",
            "dc8b702d1c774c9faf05e65c1ec519e0",
            "870f9567682c43f8b43311a4d4e3a41d",
            "c68ee81ae4f048a385960db78b3b18c2",
            "de274a0c13c847e2a3146da5a5e30c2b",
            "51ac93a4cd284d61b5463647e09fd152",
            "78de80cdcee14f61ad27cc4b31ad1384",
            "ced1d46665f84a72be4f0209061de614",
            "546c75b9414f47a38b8855acc1db7724",
            "49b6c3af203648b1a8cfcc1a21086b97",
            "df7aeb3b7fe54fd2859eafe39bda3ad2",
            "fd7ace7567d04cd1ac517f39e2adfdd8",
            "8a433652c0e641d4b932f6f325acba3c",
            "fc4eae37f90c480b8e5fe936ec54bf2b",
            "e7a7a18ebf7247b689f372fff06a68e8",
            "b5ef3be95a204763b3001dc540d9ca26",
            "e1d64167d14d41a989df6b8bf4907462",
            "5732cb24798c487c8b1724c14fb05343",
            "42fcf96e77d34799a874c924b5a6f46c",
            "ac1f0ab7f4fe4d3ea178b3efdb3a86b6"
          ]
        },
        "id": "vUkIqII5btVs",
        "outputId": "1e0e3414-a78a-4b7e-c2ea-56fa3235ad80"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "2025/06/26 18:18:38 WARNING mlflow.models.evaluation.evaluators.default: Setting the latency to 0 for all entries because the model is not provided.\n",
            "2025/06/26 18:18:38 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...\n",
            "2025/06/26 18:18:39 WARNING mlflow.metrics.metric_definitions: Failed to load 'toxicity' metric (error: ModuleNotFoundError(\"No module named 'evaluate'\")), skipping metric logging.\n",
            "2025/06/26 18:18:39 WARNING mlflow.models.evaluation.utils.metric: Did not log metric 'toxicity' at index 1 in the `extra_metrics` parameter because it returned None.\n",
            "2025/06/26 18:18:39 WARNING mlflow.metrics.metric_definitions: Failed to import textstat for flesch kincaid metric, skipping metric logging. Please install textstat using 'pip install textstat'.\n",
            "2025/06/26 18:18:39 WARNING mlflow.models.evaluation.utils.metric: Did not log metric 'flesch_kincaid_grade_level' at index 2 in the `extra_metrics` parameter because it returned None.\n",
            "2025/06/26 18:18:39 WARNING mlflow.metrics.metric_definitions: Failed to import textstat for automated readability index metric, skipping metric logging. Please install textstat using 'pip install textstat'.\n",
            "2025/06/26 18:18:39 WARNING mlflow.models.evaluation.utils.metric: Did not log metric 'ari_grade_level' at index 3 in the `extra_metrics` parameter because it returned None.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/1 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "3aef0848d3fb4021a56543d6dc02ff1f"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "2025/06/26 18:18:43 WARNING mlflow.metrics.metric_definitions: Failed to load 'toxicity' metric (error: ModuleNotFoundError(\"No module named 'evaluate'\")), skipping metric logging.\n",
            "2025/06/26 18:18:43 WARNING mlflow.models.evaluation.utils.metric: Did not log metric 'toxicity' at index 1 in the `extra_metrics` parameter because it returned None.\n",
            "2025/06/26 18:18:43 WARNING mlflow.metrics.metric_definitions: Failed to import textstat for flesch kincaid metric, skipping metric logging. Please install textstat using 'pip install textstat'.\n",
            "2025/06/26 18:18:43 WARNING mlflow.models.evaluation.utils.metric: Did not log metric 'flesch_kincaid_grade_level' at index 2 in the `extra_metrics` parameter because it returned None.\n",
            "2025/06/26 18:18:43 WARNING mlflow.metrics.metric_definitions: Failed to import textstat for automated readability index metric, skipping metric logging. Please install textstat using 'pip install textstat'.\n",
            "2025/06/26 18:18:43 WARNING mlflow.models.evaluation.utils.metric: Did not log metric 'ari_grade_level' at index 3 in the `extra_metrics` parameter because it returned None.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/6 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "a071ae5df1c1410bb592d7f13ab45690"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Aggregated Metrics:\n",
            "{'latency/mean': np.float64(0.0), 'latency/variance': np.float64(0.0), 'latency/p90': np.float64(0.0), 'exact_match/v1': 0.16666666666666666, 'answer_similarity/v1/mean': np.float64(4.333333333333333), 'answer_similarity/v1/variance': np.float64(2.2222222222222223), 'answer_similarity/v1/p90': np.float64(5.0)}\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "841c3c9f707041de8e77f6c198acf45c"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "49b6c3af203648b1a8cfcc1a21086b97"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "To view the detailed results of our evaluation, we load the saved CSV file into a DataFrame and adjust the display settings to ensure full visibility of each response. This allows us to inspect individual prompts, Gemini-generated predictions, ground truth answers, and the associated metric scores without truncation, which is especially helpful in notebook environments like Colab or Jupyter."
      ],
      "metadata": {
        "id": "NPnzLEXKD3I4"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "results = pd.read_csv('gemini_eval_results.csv')\n",
        "pd.set_option('display.max_colwidth', None)\n",
        "results"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 811
        },
        "id": "_91_o2R4dy-G",
        "outputId": "7b81b594-5368-4bfc-cedb-a400ad8aecee"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                                                                 inputs  \\\n",
              "0                       Who developed the theory of general relativity?   \n",
              "1        What are the primary functions of the liver in the human body?   \n",
              "2                              Explain what HTTP status code 404 means.   \n",
              "3           What is the boiling point of water at sea level in Celsius?   \n",
              "4                          Name the largest planet in our solar system.   \n",
              "5  What programming language is primarily used for developing iOS apps?   \n",
              "\n",
              "                                                                                                    ground_truth  \\\n",
              "0                                                    Albert Einstein developed the theory of general relativity.   \n",
              "1  The liver helps in detoxification, protein synthesis, and production of biochemicals necessary for digestion.   \n",
              "2                                     HTTP 404 means 'Not Found' — the server can't find the requested resource.   \n",
              "3                                                The boiling point of water at sea level is 100 degrees Celsius.   \n",
              "4                                                             Jupiter is the largest planet in our solar system.   \n",
              "5                                        Swift is the primary programming language used for iOS app development.   \n",
              "\n",
              "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    predictions  \\\n",
              "0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Albert Einstein developed the theory of general relativity.   \n",
              "1  The liver performs a vast array of crucial functions in the human body.  These can be broadly categorized as follows:\\n\\n**1. Metabolism:**\\n\\n* **Carbohydrate metabolism:**  The liver regulates blood glucose levels by storing glucose as glycogen (glycogenesis), breaking down glycogen into glucose (glycogenolysis), and producing glucose from non-carbohydrate sources (gluconeogenesis).\\n* **Lipid metabolism:** The liver synthesizes cholesterol and lipoproteins, processes fatty acids, and helps in the breakdown of fats.\\n* **Protein metabolism:** The liver synthesizes many essential proteins, including albumin (important for blood osmotic pressure), clotting factors, and various enzymes. It also deaminates amino acids (removes nitrogen) and converts ammonia (toxic) to urea (less toxic for excretion by the kidneys).\\n\\n**2. Detoxification:**\\n\\n* **Drug metabolism:** The liver breaks down and metabolizes many medications and other substances, making them less harmful or easier to excrete.\\n* **Waste product removal:** The liver processes and eliminates various metabolic waste products, toxins, and drugs from the bloodstream.\\n* **Hormone metabolism:** The liver metabolizes and inactivates many hormones.\\n\\n**3. Excretion:**\\n\\n* **Bile production:** The liver produces bile, a fluid crucial for fat digestion and absorption in the intestines. Bile contains bile salts, bilirubin (a byproduct of hemoglobin breakdown), and cholesterol.  Bilirubin is then excreted in the feces.\\n\\n**4. Storage:**\\n\\n* **Glycogen storage:** As mentioned above, the liver stores glucose as glycogen.\\n* **Vitamin and mineral storage:**  The liver stores vitamins A, D, E, K, and B12, as well as iron.\\n\\n**5. Synthesis:**\\n\\n* **Protein synthesis:** The liver synthesizes many essential proteins vital for various bodily functions.\\n* **Bile synthesis:**  As mentioned above.\\n* **Plasma protein synthesis:**  Including albumin and clotting factors.\\n\\n\\nThese are the primary functions; the liver's roles are interconnected and essential for maintaining overall health and homeostasis.  Damage to the liver can have severe and wide-ranging consequences throughout the body.   \n",
              "2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  HTTP status code 404 means that the server could not find the requested resource.  This is usually what people see when they get a \"Page Not Found\" error on a website.  The client (like a web browser) sent a request for a specific page or file, but the server couldn't locate it at the specified URL.   \n",
              "3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           100 degrees Celsius   \n",
              "4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       Jupiter   \n",
              "5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Swift is the primary programming language used for developing iOS apps.  While Objective-C was used extensively in the past, Swift is now Apple's preferred language for iOS, macOS, watchOS, and tvOS development.   \n",
              "\n",
              "   latency  token_count  answer_similarity/v1/score  \\\n",
              "0        0           10                           5   \n",
              "1        0          475                           5   \n",
              "2        0           68                           5   \n",
              "3        0            3                           5   \n",
              "4        0            2                           1   \n",
              "5        0           43                           5   \n",
              "\n",
              "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          answer_similarity/v1/justification  \n",
              "0                                                                                                                                                                                                                                                                                 The provided output exactly matches the target, indicating a perfect semantic similarity. The output and target both state that \"Albert Einstein developed the theory of general relativity,\" demonstrating complete alignment in all significant aspects.  \n",
              "1                                                                                                                                       The model's output closely aligns with the provided targets in all significant aspects. It covers the liver's role in detoxification, protein synthesis, and production of biochemicals necessary for digestion, which are the key points in the targets. Additionally, it provides a comprehensive and detailed explanation of these functions, demonstrating a high degree of semantic similarity.  \n",
              "2  The provided output closely aligns with the target in all significant aspects. It accurately explains that HTTP 404 means 'Not Found' and that it is related to the server not being able to find the requested resource. The output also provides additional context and details, such as the \"Page Not Found\" error and the interaction between the client and the server, which are not explicitly mentioned in the target but are inherently related to the concept. Therefore, it demonstrates a high degree of semantic similarity.  \n",
              "3                                                                                                                                                                                                                                                                             The provided output perfectly aligns with the target. It accurately states that the boiling point of water at sea level is 100 degrees Celsius, which is exactly what the target information conveys. Therefore, it demonstrates complete semantic similarity.  \n",
              "4                                                                                                                                                                             The provided output, \"Jupiter\", has little to no semantic similarity to the provided target. The target provides a detailed description of Jupiter being the largest planet in our solar system, while the output only mentions the name of the planet without any additional context or details. Therefore, it demonstrates little to no semantic similarity.  \n",
              "5                                                                                                                                                                    The provided output closely aligns with the target in all significant aspects. It accurately states that Swift is the primary programming language used for iOS app development, which is the main point in the target. It also adds relevant context about Swift's use in other Apple platforms and its replacement of Objective-C, enhancing the semantic similarity.  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-b909a503-acb5-4139-9d3c-64700085a326\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>inputs</th>\n",
              "      <th>ground_truth</th>\n",
              "      <th>predictions</th>\n",
              "      <th>latency</th>\n",
              "      <th>token_count</th>\n",
              "      <th>answer_similarity/v1/score</th>\n",
              "      <th>answer_similarity/v1/justification</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Who developed the theory of general relativity?</td>\n",
              "      <td>Albert Einstein developed the theory of general relativity.</td>\n",
              "      <td>Albert Einstein developed the theory of general relativity.</td>\n",
              "      <td>0</td>\n",
              "      <td>10</td>\n",
              "      <td>5</td>\n",
              "      <td>The provided output exactly matches the target, indicating a perfect semantic similarity. The output and target both state that \"Albert Einstein developed the theory of general relativity,\" demonstrating complete alignment in all significant aspects.</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>What are the primary functions of the liver in the human body?</td>\n",
              "      <td>The liver helps in detoxification, protein synthesis, and production of biochemicals necessary for digestion.</td>\n",
              "      <td>The liver performs a vast array of crucial functions in the human body.  These can be broadly categorized as follows:\\n\\n**1. Metabolism:**\\n\\n* **Carbohydrate metabolism:**  The liver regulates blood glucose levels by storing glucose as glycogen (glycogenesis), breaking down glycogen into glucose (glycogenolysis), and producing glucose from non-carbohydrate sources (gluconeogenesis).\\n* **Lipid metabolism:** The liver synthesizes cholesterol and lipoproteins, processes fatty acids, and helps in the breakdown of fats.\\n* **Protein metabolism:** The liver synthesizes many essential proteins, including albumin (important for blood osmotic pressure), clotting factors, and various enzymes. It also deaminates amino acids (removes nitrogen) and converts ammonia (toxic) to urea (less toxic for excretion by the kidneys).\\n\\n**2. Detoxification:**\\n\\n* **Drug metabolism:** The liver breaks down and metabolizes many medications and other substances, making them less harmful or easier to excrete.\\n* **Waste product removal:** The liver processes and eliminates various metabolic waste products, toxins, and drugs from the bloodstream.\\n* **Hormone metabolism:** The liver metabolizes and inactivates many hormones.\\n\\n**3. Excretion:**\\n\\n* **Bile production:** The liver produces bile, a fluid crucial for fat digestion and absorption in the intestines. Bile contains bile salts, bilirubin (a byproduct of hemoglobin breakdown), and cholesterol.  Bilirubin is then excreted in the feces.\\n\\n**4. Storage:**\\n\\n* **Glycogen storage:** As mentioned above, the liver stores glucose as glycogen.\\n* **Vitamin and mineral storage:**  The liver stores vitamins A, D, E, K, and B12, as well as iron.\\n\\n**5. Synthesis:**\\n\\n* **Protein synthesis:** The liver synthesizes many essential proteins vital for various bodily functions.\\n* **Bile synthesis:**  As mentioned above.\\n* **Plasma protein synthesis:**  Including albumin and clotting factors.\\n\\n\\nThese are the primary functions; the liver's roles are interconnected and essential for maintaining overall health and homeostasis.  Damage to the liver can have severe and wide-ranging consequences throughout the body.</td>\n",
              "      <td>0</td>\n",
              "      <td>475</td>\n",
              "      <td>5</td>\n",
              "      <td>The model's output closely aligns with the provided targets in all significant aspects. It covers the liver's role in detoxification, protein synthesis, and production of biochemicals necessary for digestion, which are the key points in the targets. Additionally, it provides a comprehensive and detailed explanation of these functions, demonstrating a high degree of semantic similarity.</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Explain what HTTP status code 404 means.</td>\n",
              "      <td>HTTP 404 means 'Not Found' — the server can't find the requested resource.</td>\n",
              "      <td>HTTP status code 404 means that the server could not find the requested resource.  This is usually what people see when they get a \"Page Not Found\" error on a website.  The client (like a web browser) sent a request for a specific page or file, but the server couldn't locate it at the specified URL.</td>\n",
              "      <td>0</td>\n",
              "      <td>68</td>\n",
              "      <td>5</td>\n",
              "      <td>The provided output closely aligns with the target in all significant aspects. It accurately explains that HTTP 404 means 'Not Found' and that it is related to the server not being able to find the requested resource. The output also provides additional context and details, such as the \"Page Not Found\" error and the interaction between the client and the server, which are not explicitly mentioned in the target but are inherently related to the concept. Therefore, it demonstrates a high degree of semantic similarity.</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>What is the boiling point of water at sea level in Celsius?</td>\n",
              "      <td>The boiling point of water at sea level is 100 degrees Celsius.</td>\n",
              "      <td>100 degrees Celsius</td>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>5</td>\n",
              "      <td>The provided output perfectly aligns with the target. It accurately states that the boiling point of water at sea level is 100 degrees Celsius, which is exactly what the target information conveys. Therefore, it demonstrates complete semantic similarity.</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Name the largest planet in our solar system.</td>\n",
              "      <td>Jupiter is the largest planet in our solar system.</td>\n",
              "      <td>Jupiter</td>\n",
              "      <td>0</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>The provided output, \"Jupiter\", has little to no semantic similarity to the provided target. The target provides a detailed description of Jupiter being the largest planet in our solar system, while the output only mentions the name of the planet without any additional context or details. Therefore, it demonstrates little to no semantic similarity.</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>What programming language is primarily used for developing iOS apps?</td>\n",
              "      <td>Swift is the primary programming language used for iOS app development.</td>\n",
              "      <td>Swift is the primary programming language used for developing iOS apps.  While Objective-C was used extensively in the past, Swift is now Apple's preferred language for iOS, macOS, watchOS, and tvOS development.</td>\n",
              "      <td>0</td>\n",
              "      <td>43</td>\n",
              "      <td>5</td>\n",
              "      <td>The provided output closely aligns with the target in all significant aspects. It accurately states that Swift is the primary programming language used for iOS app development, which is the main point in the target. It also adds relevant context about Swift's use in other Apple platforms and its replacement of Objective-C, enhancing the semantic similarity.</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b909a503-acb5-4139-9d3c-64700085a326')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-b909a503-acb5-4139-9d3c-64700085a326 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-b909a503-acb5-4139-9d3c-64700085a326');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "    <div id=\"df-56cb9276-1602-4bb3-a82a-40e338225d67\">\n",
              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-56cb9276-1602-4bb3-a82a-40e338225d67')\"\n",
              "                title=\"Suggest charts\"\n",
              "                style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "      </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "      <script>\n",
              "        async function quickchart(key) {\n",
              "          const quickchartButtonEl =\n",
              "            document.querySelector('#' + key + ' button');\n",
              "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "          try {\n",
              "            const charts = await google.colab.kernel.invokeFunction(\n",
              "                'suggestCharts', [key], {});\n",
              "          } catch (error) {\n",
              "            console.error('Error during call to suggestCharts:', error);\n",
              "          }\n",
              "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "        }\n",
              "        (() => {\n",
              "          let quickchartButtonEl =\n",
              "            document.querySelector('#df-56cb9276-1602-4bb3-a82a-40e338225d67 button');\n",
              "          quickchartButtonEl.style.display =\n",
              "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "        })();\n",
              "      </script>\n",
              "    </div>\n",
              "\n",
              "  <div id=\"id_3fe58b03-084c-4288-9e7d-aa4576bceebc\">\n",
              "    <style>\n",
              "      .colab-df-generate {\n",
              "        background-color: #E8F0FE;\n",
              "        border: none;\n",
              "        border-radius: 50%;\n",
              "        cursor: pointer;\n",
              "        display: none;\n",
              "        fill: #1967D2;\n",
              "        height: 32px;\n",
              "        padding: 0 0 0 0;\n",
              "        width: 32px;\n",
              "      }\n",
              "\n",
              "      .colab-df-generate:hover {\n",
              "        background-color: #E2EBFA;\n",
              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "        fill: #174EA6;\n",
              "      }\n",
              "\n",
              "      [theme=dark] .colab-df-generate {\n",
              "        background-color: #3B4455;\n",
              "        fill: #D2E3FC;\n",
              "      }\n",
              "\n",
              "      [theme=dark] .colab-df-generate:hover {\n",
              "        background-color: #434B5C;\n",
              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "        fill: #FFFFFF;\n",
              "      }\n",
              "    </style>\n",
              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('results')\"\n",
              "            title=\"Generate code using this dataframe.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "    <script>\n",
              "      (() => {\n",
              "      const buttonEl =\n",
              "        document.querySelector('#id_3fe58b03-084c-4288-9e7d-aa4576bceebc button.colab-df-generate');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      buttonEl.onclick = () => {\n",
              "        google.colab.notebook.generateWithVariable('results');\n",
              "      }\n",
              "      })();\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "results",
              "summary": "{\n  \"name\": \"results\",\n  \"rows\": 6,\n  \"fields\": [\n    {\n      \"column\": \"inputs\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Who developed the theory of general relativity?\",\n          \"What are the primary functions of the liver in the human body?\",\n          \"What programming language is primarily used for developing iOS apps?\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"ground_truth\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Albert Einstein developed the theory of general relativity.\",\n          \"The liver helps in detoxification, protein synthesis, and production of biochemicals necessary for digestion.\",\n          \"Swift is the primary programming language used for iOS app development.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"predictions\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Albert Einstein developed the theory of general relativity.\",\n          \"The liver performs a vast array of crucial functions in the human body.  These can be broadly categorized as follows:\\n\\n**1. Metabolism:**\\n\\n* **Carbohydrate metabolism:**  The liver regulates blood glucose levels by storing glucose as glycogen (glycogenesis), breaking down glycogen into glucose (glycogenolysis), and producing glucose from non-carbohydrate sources (gluconeogenesis).\\n* **Lipid metabolism:** The liver synthesizes cholesterol and lipoproteins, processes fatty acids, and helps in the breakdown of fats.\\n* **Protein metabolism:** The liver synthesizes many essential proteins, including albumin (important for blood osmotic pressure), clotting factors, and various enzymes. It also deaminates amino acids (removes nitrogen) and converts ammonia (toxic) to urea (less toxic for excretion by the kidneys).\\n\\n**2. Detoxification:**\\n\\n* **Drug metabolism:** The liver breaks down and metabolizes many medications and other substances, making them less harmful or easier to excrete.\\n* **Waste product removal:** The liver processes and eliminates various metabolic waste products, toxins, and drugs from the bloodstream.\\n* **Hormone metabolism:** The liver metabolizes and inactivates many hormones.\\n\\n**3. Excretion:**\\n\\n* **Bile production:** The liver produces bile, a fluid crucial for fat digestion and absorption in the intestines. Bile contains bile salts, bilirubin (a byproduct of hemoglobin breakdown), and cholesterol.  Bilirubin is then excreted in the feces.\\n\\n**4. Storage:**\\n\\n* **Glycogen storage:** As mentioned above, the liver stores glucose as glycogen.\\n* **Vitamin and mineral storage:**  The liver stores vitamins A, D, E, K, and B12, as well as iron.\\n\\n**5. Synthesis:**\\n\\n* **Protein synthesis:** The liver synthesizes many essential proteins vital for various bodily functions.\\n* **Bile synthesis:**  As mentioned above.\\n* **Plasma protein synthesis:**  Including albumin and clotting factors.\\n\\n\\nThese are the primary functions; the liver's roles are interconnected and essential for maintaining overall health and homeostasis.  Damage to the liver can have severe and wide-ranging consequences throughout the body.\",\n          \"Swift is the primary programming language used for developing iOS apps.  While Objective-C was used extensively in the past, Swift is now Apple's preferred language for iOS, macOS, watchOS, and tvOS development.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"latency\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 0,\n        \"num_unique_values\": 1,\n        \"samples\": [\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"token_count\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 185,\n        \"min\": 2,\n        \"max\": 475,\n        \"num_unique_values\": 6,\n        \"samples\": [\n          10\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"answer_similarity/v1/score\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1,\n        \"min\": 1,\n        \"max\": 5,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"answer_similarity/v1/justification\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"The provided output exactly matches the target, indicating a perfect semantic similarity. The output and target both state that \\\"Albert Einstein developed the theory of general relativity,\\\" demonstrating complete alignment in all significant aspects.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    }
  ]
}